{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "authorship_tag": "ABX9TyMSpXoH/El/9Vgw5TD3bpx/",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/Huangjian2013/ai-demo/blob/main/rag/11-LlamaIndex-pdf-rag.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 67,
      "metadata": {
        "id": "N7rbJ7CbVUC8"
      },
      "outputs": [],
      "source": [
        "!pip install llama-index --quiet\n",
        "!pip install faiss-cpu --quiet\n",
        "!pip install llama-index-vector-stores-faiss --quiet\n",
        "!pip install llama-index-embeddings-openai --quiet\n",
        "!pip install openai --quiet\n",
        "!pip install tiktoken --quiet"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from google.colab import userdata\n",
        "from llama_index.core import Settings\n",
        "from llama_index.core import SimpleDirectoryReader,VectorStoreIndex\n",
        "from llama_index.vector_stores.faiss import FaissVectorStore\n",
        "from llama_index.embeddings.openai import OpenAIEmbedding\n",
        "from llama_index.core.text_splitter import SentenceSplitter\n",
        "from llama_index.core.ingestion import IngestionPipeline\n",
        "import faiss\n",
        "import openai"
      ],
      "metadata": {
        "id": "pBXoK4AOXiZU"
      },
      "execution_count": 68,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "dimensions_size = 512\n",
        "openai.api_key = userdata.get('REAL_OPENAI_KEY')\n",
        "Settings.embedding_model = OpenAIEmbedding(dimensions = dimensions_size)"
      ],
      "metadata": {
        "id": "8JvzBp_2JpIC"
      },
      "execution_count": 69,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "node_reader = SimpleDirectoryReader(input_files=[\"sample_data/极越01.pdf\"])\n",
        "documents = node_reader.load_data()"
      ],
      "metadata": {
        "id": "MsfAdO0tL8IV"
      },
      "execution_count": 70,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "faiss_index = faiss.IndexFlatL2(dimensions_size)\n",
        "vector_store = FaissVectorStore(faiss_index=faiss_index)"
      ],
      "metadata": {
        "id": "09RLMmiGOdCr"
      },
      "execution_count": 71,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "text_splitter = SentenceSplitter(chunk_size=200)\n",
        "pipeline = IngestionPipeline(\n",
        "    transformations=[text_splitter],\n",
        "    vector_store=vector_store,\n",
        ")"
      ],
      "metadata": {
        "id": "P55WLkafQAMj"
      },
      "execution_count": 72,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "notes = pipeline.run(documents=documents)"
      ],
      "metadata": {
        "id": "dM-rQBy6Qpa_"
      },
      "execution_count": 73,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "vector_store_index = VectorStoreIndex(notes)\n",
        "retrievers = vector_store_index.as_retriever(similarity_top_k=1)"
      ],
      "metadata": {
        "id": "3cleckTgRG2E"
      },
      "execution_count": 74,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "context = retrievers.retrieve(\"极越01的型号尺寸是多少？\")\n",
        "for i in context:\n",
        "  print(\"-----------------------------\")\n",
        "  print(i.node.get_text())"
      ],
      "metadata": {
        "id": "xdkWF2j-RWnT",
        "outputId": "b0dbb242-e271-46fd-d731-120378f5ed7c",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "execution_count": 75,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "-----------------------------\n",
            "[22] \n",
            "外观方面  \n",
            "播报 \n",
            "编辑 \n",
            "外观方面，极越 01采用了自主的设计理念，其长宽高分别为 4853/1990/1611 毫米，轴距为\n",
            "3000毫米。 [16] \n",
            "内饰方面  \n",
            "播报 \n",
            "编辑 \n",
            "进入车内，极简的设计风格给人以概念车的既视感。一块 35.6英寸6k超清一体屏悬浮于中\n",
            "控台之上，丰富的功能选项大量简化物理按键，包括可用语音操作的 3D智驾，营造科幻座\n",
            "舱氛围。  [15] \n",
            "价格方面  \n",
            " \n",
            " \n",
            "极越01峰值功率 400kW、零百加速不到 4s、最大续航里程超 700km。\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "query_engine = vector_store_index.as_query_engine()\n",
        "response = query_engine.query(\"极越01的型号尺寸是多少？\")\n",
        "print(response)\n"
      ],
      "metadata": {
        "id": "bJXy5a50T-53",
        "outputId": "7c1f4933-c6a2-45f0-e0f0-e8c5c8d837e4",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "execution_count": 76,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "4853/1990/1611 毫米\n"
          ]
        }
      ]
    }
  ]
}